---
title: "Cleaning Afrobarometer"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load afro barometer: round 6
  afro_6_raw <- 
    read_sav("../raw-data/y-multi-afrobarometer/round-6/merged_r6_data_2016_36countries2.sav", encoding = "latin1") 
  
# load afro barometer: round 7
  afro_7_raw <- 
    read_sav("../raw-data/y-multi-afrobarometer/round-7/r7_merged_data_34ctry.release.sav")
```

# Clean Round 1

-NOT PROCESSED:
--REASONS:
---No questions about religious intolerance

# Clean Round 2

-NOT PROCESSED:
--REASONS:
---No questions about religious intolerance

# Clean Round 3

-NOT PROCESSED:
--REASONS:
---No questions about religious intolerance

# Clean Round 4

-NOT PROCESSED:
--REASONS:
---No questions about religious intolerance

# Clean Round 5

-NOT PROCESSED:
--REASONS:
---No questions about religious intolerance

# Clean Round 6

```{r}
# clean
  clean_af6 <- 
    afro_6_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Afrobarometer",
        
      # round number (character vector, title case)  
        resp_round = "Round 6",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3POesJs",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(COUNTRY),

      # country (character vector; converts to countrycode county.name list)
        resp_country_original = 
          case_when(
            COUNTRY == 25 ~ "Sao Tome and Principe",
            TRUE ~ resp_country_original),
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = DATEINTR,
 
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################

      # respondent's religion (character vector that corresponds to master list)
        resp_religion =
          case_when(
            Q98A %in% c((1:17), (30:33), 35, 36, 100, 220, 260, 300, 420, (460:463), 540, 541, 820, 822, 860, 1260) ~ "Christian",
            Q98A %in% c(34) ~ "Jewish",
            Q98A %in% c((18:24), (500:503), 620, 660, 930, 931) ~ "Muslim",
            Q98A %in% c(26) ~ "Hindu",
            Q98A %in% c(27) ~ "Other religion", # bahai
            Q98A %in% c(25, 421, 422, 9995) ~ "Other religion",
            TRUE ~ NA_character_),
      
      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = to_character(Q98A),      

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age =
          dplyr::recode(
            as.character(Q1),
            "98" = NA_character_,
            "998" = NA_character_,
            "999" = NA_character_, 
            "-1" = NA_character_),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(Q97),
            "0" = "1. No formal schooling [No education]",
            "1" = "2. Informal schooling only (including Koranic schooling) [No education]",
            "2" = "3. Some primary schooling [No education]",
            "3" = "4. Primary school completed [Primary]",
            "4" = "5. Intermediate school or Some secondary school / high school [Primary]",
            "5" = "6. Secondary school / high school completed [Primary]",
            "6" = "7. Post-secondary qualifications other than university e.g. a diploma or degree from a polytechnic or college [Primary]",
            "7" = "8. Some university [Primary]",
            "8" = "9. University completed [College]",
            "9" = "10. Post graduate [College]",
            "99" = NA_character_,
            "98" = NA_character_,
            "-1" = NA_character_,
            .default = NA_character_),      
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female =
          case_when(
            Q101 == 1 ~ 0,
            Q101 == 2 ~ 1,
            TRUE ~ NA_real_),
    
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            Q115 == 1 ~ 1,
            Q115 %in% c(2, 3) ~ 0,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: Q89A; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care: People of different religion.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] +  3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(Q89A),
            "-1" = NA_character_,
            "9" = NA_character_,
            "98" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            Q89A %in% c(1:2) ~ 1,
            Q89A %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA, # checked; no general trust question

      # original response (as character vector)
        resp_gentrust_original = NA,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA,
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: Q98B; QTEXT: People practice their religion in different ways. Aside from weddings and funerals, how often do you personally engage in religious practices like prayer, reading a religious book, or attending a religious service or a meeting of a religious group?; ROPTIONS: 0 = Never + 1 = A few times a year + 2 = About once a month + 3 = About once a week + 4 = A few times a week + 5 = About once a day + 6 = More than once a day + 7 = Respondent has no religion",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(Q98B),
            `-1` = NA_real_,
            `9` = NA_real_,
            `98` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode =
          case_when(
            resp_religiosity_original != 7 ~ resp_religiosity_original/6,
            resp_religiosity_original == 7 ~ 0
          )
        
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Round 7

```{r}
# clean
  clean_af7 <- 
    afro_7_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Afrobarometer",
        
      # round number (character vector, title case)  
        resp_round = "Round 7",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3POesJs",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(COUNTRY),
    
      # country (character vector; converts to countrycode county.name list)
        resp_country_common = countryname(resp_country_original),
    
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = DATEINTR,
   # "%d%m%y
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion =
          case_when(
            Q98 %in% c((1:17), (30:33), 100, 101, 220, 260, 460, 540, 541, 700, (742:745), (821:824), 860, 1220, (1620:1622), (1660:1662), (1700:1702)) ~ "Christian",
            Q98 %in% c(34) ~ "Jewish",
            Q98 %in% c((18:24), 181, 500, 620, 660, 740, 931, 1100) ~ "Muslim",
            Q98 %in% c(26) ~ "Hindu",
            Q98 %in% c(27) ~ "Other religion", # bahai            
            Q98 %in% c(25, 9995) ~ "Other religion",
            TRUE ~ NA_character_),

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = to_character(Q98),

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age =
          dplyr::recode(
            as.character(Q1),
            "998" = NA_character_,
            "999" = NA_character_, 
            "-1" = NA_character_,
            "99999" = NA_character_),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(Q97),
            "0" = "0. No formal schooling [No education]",
            "1" = "1. Informal schooling only (including Koranic schooling) [No education]",
            "2" = "2. Some primary schooling [No education]",
            "3" = "3. Primary school completed [Primary]",
            "4" = "4. Intermediate school or Some secondary school / high school [Primary]",
            "5" = "5. Secondary school / high school completed [Primary]",
            "6" = "6. Post-secondary qualifications, other than university e.g. a diploma or degree from a polytechnic or college [Primary]",
            "7" = "7. Some university [Primary]",
            "8" = "8. University completed [College]",
            "9" = "9. Post-graduate [College]",
            .default = NA_character_),    
   
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female =
          case_when(
            Q101 == 1 ~ 0,
            Q101 == 2 ~ 1,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            Q115 == 1 ~ 1,
            Q115 %in% c(2, 3) ~ 0,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: Q87A; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care: People of different religion.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(Q87A),
            "9" = NA_character_,
            "8" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            Q87A %in% c(1:2) ~ 1,
            Q87A %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
   
   ############################  
   ### GENERAL SOCIAL TRUST ###  
   ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA, # checked; no general trust question

      # original response (as character vector)
        resp_gentrust_original = NA,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA,
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = NA, # chekced, no religiosity question
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = NA,       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = NA
        
    ) %>% 
    select(starts_with("resp_"))
```

# Stack dataframes

```{r}
# stack
  stacked <- 
    clean_af6 %>% 
    bind_rows(clean_af7)
```

# Save data

```{r}
  saveRDS(stacked, file = "../cleaned-data/y-18-multi-afrobarometer.rds")
```
